import psqloperation as psql
import jsbeautify
import rmcomment
import chunkJS
import malurl
import random
import numpy as np

import commands
import sys
import os
import fnmatch

# For path_checksum
import hashlib
from os.path import normpath, walk, isdir, isfile, dirname, basename, \
	exists as path_exists, join as path_join


def main(malClassList="/space/webcrawl-malware/data/TempData/URLs/top20.malurl.stats", numPerClass=1000, includeBenign=True):
	# The benign class is included by default
	# The format for malClassList is:
	# type,label,totalCount,numClass
	malClasses = open(malClassList, 'r')
	for line in malClasses:
		line = line.split(',')
		malType = line[0]
		if len(line) == 4:
			numPerClass = int(line[3])
		malNames = malNames.shuffle()
		malNames = malNames[0:numPerClass]
		sample = GetSample(malNames)




	sample = GetSample()
	if mcount == None:
		samplefile = '{0}SamplesWTerryList'.format(bcount)
		outputdir = '/space/webcrawl-malware/data/{0}SamplesWTerry/'.format(bcount) 
		mcount = bcount / 2
		bcount = bcount - mcount
	else:
		samplefile = '{0}B{1}MSamplesWTerryList'.format(bcount, mcount)
		outputdir = '/space/webcrawl-malware/data/{0}B{1}MSamplesWTerry/'.format(bcount, mcount)

	# Check whether to include Terry's examples.
	if wTerry == True:
		sample.jslistNoTerry()
		mcount = mcount - 56
	else:
		samplefile = '{0}B{1}MSamplesList'.format(bcount, mcount)
		outputdir = '/space/webcrawl-malware/data/{0}B{1}MSamples/'.format(bcount, mcount)

	# Check whether bSort is valid
	if bSort != 'Alexa' and bSort != 'Size' and bSort != 'Random':
		print 'Unexpected Value for bSort!\nOnly Alexa and Size are Accepted.'
		sys.exit(1)

	# Generate sample list
	sample.sampleList(maliciouscount = mcount, benigncount = bcount, outfile = samplefile, benignsort = bSort)
	GetSample().copySampleList(samplefile = samplefile, outdir = outputdir)

	if wTerry == True:
		'''copySampleList will create directory if not exist'''
		GetSample().copyTerry(jsdir ='/space/webcrawl-malware/data/JSAdd2Tree/', outdir = outputdir)


if __name__ == '__main__':
	main()
